The data for this dashboard project comes from two sources, https://www.kaggle.com/russellyates88/suicide-rates-overview-1985-to-2016 and https://apps.who.int/iris/bitstream/handle/10665/258814/WHO-MSD-MER-17.5-eng.pdf?sequence=1.
The visualizations are created to see the situation of suicide around the world between 1985 and 2016. From the exploratory visualizations, we can see that males are more likely to suicide than females, and there is no clear correlation between GDP and suicide rate. One more thing is that, people from generation boomers are more likely to suicide than people from other generations.
The whole project was done in Rstudio, and the packages of tidyverse, ggpplot2, igraph, ggraph, ggcorrplot, rworldmap, circlize, viridis, tidytext, widyr, tidyr, and dplyr are used.
---
title: "Suicide Rates Compares Socio-economic"
output:
flexdashboard::flex_dashboard:
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(reticulate)
```
About
=======================================================================
The data for this dashboard project comes from two sources, https://www.kaggle.com/russellyates88/suicide-rates-overview-1985-to-2016 and
https://apps.who.int/iris/bitstream/handle/10665/258814/WHO-MSD-MER-17.5-eng.pdf?sequence=1.
The visualizations are created to see the situation of suicide around the world between 1985 and 2016. From the exploratory visualizations, we can see that males are more likely to suicide than females, and there is no clear correlation between GDP and suicide rate. One more thing is that, people from generation boomers are more likely to suicide than people from other generations.
The whole project was done in Rstudio, and the packages of tidyverse, ggpplot2, igraph, ggraph, ggcorrplot, rworldmap, circlize, viridis, tidytext, widyr, tidyr, and dplyr are used.
Exploratory
=======================================================================
Column {data-width=500}
-----------------------------------------------------------------------
### Gender vs Country suicide rate
```{r, include=FALSE}
library(tidyverse)
library(ggplot2)
library(igraph)
library(ggraph)
data <- read.csv('master.csv', encoding = 'UTF-8')
```
```{r, fig.height = 12, fig.width = 10}
data %>% group_by(X.U.FEFF.country, sex) %>% summarise(suicide_rate = mean(suicides_no), population = mean(population)) %>%
mutate(suicide_rate = suicide_rate / population) %>% arrange(suicide_rate) %>%
ggplot() + geom_bar(aes(reorder(X.U.FEFF.country, suicide_rate), suicide_rate, fill = suicide_rate, color = sex), stat='identity') +
coord_flip() + labs(x="country")
```
Column {data-width=500}
-----------------------------------------------------------------------
### Correlations of factors
```{r, include=FALSE}
library(ggcorrplot)
c <- data[, c(2, 5, 6, 7, 11)]
c <- sapply(c, as.numeric)
corr <- cor(c)
```
```{r, fig.height = 3, fig.width = 5}
ggcorrplot(corr, method = "circle", hc.order = TRUE, outline.col = "white")
```
### Percentage of suicide per generation
```{r, fig.height = 5, fig.width = 8}
a <- data %>% group_by(generation) %>% summarise(suicide_rate = sum(suicides_no)) %>% mutate(prop = suicide_rate / sum(suicide_rate) * 100) %>% arrange(desc(prop))
pie(a$prop, labels = paste(a$generation, sep = " ", round(a$prop, 2), "%"), col = c("purple", "violetred1", "green3", "cornsilk", "cyan", "white"))
```
Map
=======================================================================
Column {.tabset}
-------------------------------------
### suicide rate map in 1992
```{r, include=FALSE}
library(rworldmap)
m <- data %>% group_by(X.U.FEFF.country, year) %>% summarise(suicide_rate = sum(suicides_no), population = sum(population)) %>% mutate(suicide_rate = suicide_rate / population * 100)
m <- m[c('X.U.FEFF.country', 'year', 'suicide_rate')]
m92 <- m %>% filter(year == 1992)
m92
Map92 <- joinCountryData2Map(m92, joinCode = "NAME", nameJoinColumn = "X.U.FEFF.country")
```
```{r}
mapParams <- mapCountryData(Map92,
nameColumnToPlot="suicide_rate",
oceanCol = "azure2",
catMethod = "continious",
missingCountryCol = gray(.8),
colourPalette = c("coral",
"coral2",
"coral3", "orangered",
"orangered3", "orangered4"),
addLegend = T,
mapTitle = "",
border = NA)
```
### suicide rate map in 1997
```{r, include=FALSE}
m97 <- m %>% filter(year == 1997)
Map97 <- joinCountryData2Map(m97, joinCode = "NAME", nameJoinColumn = "X.U.FEFF.country")
```
```{r}
mapParams <- mapCountryData(Map97,
nameColumnToPlot="suicide_rate",
oceanCol = "azure2",
catMethod = "continious",
missingCountryCol = gray(.8),
colourPalette = c("coral",
"coral2",
"coral3", "orangered",
"orangered3", "orangered4"),
addLegend = T,
mapTitle = "",
border = NA)
```
### suicide rate map in 2002
```{r, include=FALSE}
m02 <- m %>% filter(year == 2002)
Map02 <- joinCountryData2Map(m02, joinCode = "NAME", nameJoinColumn = "X.U.FEFF.country")
```
```{r}
mapParams <- mapCountryData(Map02,
nameColumnToPlot="suicide_rate",
oceanCol = "azure2",
catMethod = "continious",
missingCountryCol = gray(.8),
colourPalette = c("coral",
"coral2",
"coral3", "orangered",
"orangered3", "orangered4"),
addLegend = T,
mapTitle = "",
border = NA)
```
### suicide rate map in 2007
```{r, include=FALSE}
m07 <- m %>% filter(year == 2007)
Map07 <- joinCountryData2Map(m07, joinCode = "NAME", nameJoinColumn = "X.U.FEFF.country")
```
```{r}
mapParams <- mapCountryData(Map07,
nameColumnToPlot="suicide_rate",
oceanCol = "azure2",
catMethod = "continious",
missingCountryCol = gray(.8),
colourPalette = c("coral",
"coral2",
"coral3", "orangered",
"orangered3", "orangered4"),
addLegend = T,
mapTitle = "",
border = NA)
```
### suicide rate map in 2012
```{r, include=FALSE}
m12 <- m %>% filter(year == 2012)
Map12 <- joinCountryData2Map(m12, joinCode = "NAME", nameJoinColumn = "X.U.FEFF.country")
```
```{r}
mapParams <- mapCountryData(Map12,
nameColumnToPlot="suicide_rate",
oceanCol = "azure2",
catMethod = "continious",
missingCountryCol = gray(.8),
colourPalette = c("coral",
"coral2",
"coral3", "orangered",
"orangered3", "orangered4"),
addLegend = T,
mapTitle = "",
border = NA)
```
More {data-orientation=rows}
=======================================================================
Row{ .tabset}
-----------------------------------------------------------------------
### Trend of ages
```{r, fig.height = 6, fig.width = 15}
data %>% group_by(year, age) %>% summarise(suicide_rate = sum(suicides_no)) %>%
ggplot(aes(year, suicide_rate, color = age)) + geom_point(aes(shape = age)) + geom_line() + theme_bw()
```
### Networks between generation and countries of top 10 suicide rate
```{r, include=FALSE}
top <- data %>% group_by(X.U.FEFF.country, year) %>% summarise(suicide_rate = sum(suicides_no), population = sum(population)) %>% mutate(suicide_rate = suicide_rate/population*100000)
top <- top %>% group_by(X.U.FEFF.country) %>% summarise(suicide_rate = mean(suicide_rate)) %>% arrange(desc(suicide_rate))
rate <- data[data$X.U.FEFF.country %in% top$X.U.FEFF.country[1:10], ]
rate <- rate %>% group_by(X.U.FEFF.country, generation) %>% summarise(suicide_rate = mean(suicides_no), population = mean(population)) %>% mutate(suicide_rate = suicide_rate/population*100000)
library(circlize)
library(viridis)
colnames(rate) <- c('source', 'target', 'count', '')
rate <- rate[c('source', 'target', 'count')]
rate$count <- ceiling(rate$count)*1000
```
```{r, fig.height = 5, fig.width = 5}
mycolor <- viridis(16, alpha = 1, begin = 0, end = 1, option = "D")
mycolor <- mycolor[sample(1:16)]
chordDiagram(rate,
grid.col = mycolor,
transparency = 0.25,
directional = 1,
direction.type = c("arrows", "diffHeight"),
diffHeight = -mm_h(2),
annotationTrack = "grid",
annotationTrackHeight = c(0.05, 0.05),
link.arr.type = "big.arrow",
link.sort = TRUE,
link.largest.ontop = TRUE)
circos.trackPlotRegion(
track.index = 1,
bg.border = NA,
panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
sector.index = get.cell.meta.data("sector.index")
# Add names to the sector.
circos.text(
x = mean(xlim),
y = 2,
labels = sector.index,
facing = "bending",
niceFacing = TRUE,
adj = c(0, 0.5),
cex = 0.6
)
}
)
```
### National suicide prevention strategies
```{r, include=FALSE}
library(tidytext)
library(widyr)
library(tidyr)
library(dplyr)
# convert the PDF file into a text file
lapply('National_suicide_prevention_strategies.pdf', function(i) system(paste('"xpdf/bin64/pdftotext.exe"', paste0('"', i, '"')), wait = FALSE) )
```
```{r, include=FALSE}
# read the txt file
df <- lapply("National_suicide_prevention_strategies.txt", function(x) {
tmp <- try(read.delim(x, encoding = 'utf-8'))
if (!inherits(tmp, 'try-error')) tmp
})
#df <- read.delim("National_suicide_prevention_strategies.txt", header = FALSE)
```
```{r, include=FALSE}
# select the context without reference
df <- df[[1]][[1]]
df <- df[96:576]
t1 <- tibble(as.character(1:481), df)
colnames(t1) <- c('id', 'prevention')
```
```{r, include=FALSE}
t1 <- t1 %>%
unnest_tokens(word, prevention) %>%
anti_join(stop_words)
my_stopwords <- tibble(word = c(as.character(1:3000), 'behaviours', 'people', 'behaviour', 'prevention'))
t1 <- t1 %>% anti_join(my_stopwords)
t1 <- data.frame(t1)
```
```{r, include=FALSE}
# count how many times each pair of words occurs together in a paragraph
word_pairs1 <- t1 %>%
pairwise_count(word, id, sort = TRUE, upper = FALSE)
```
```{r, fig.height = 5, fig.width = 8}
# plot networks of the co-occurring words
word_pairs1 %>%
filter(n >= 15) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "blue") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()
```